@@ -61,7 +61,10 @@ module Agents |
||
61 | 61 |
end |
62 | 62 |
|
63 | 63 |
def validate_options |
64 |
- errors.add(:base, "url, expected_update_period_in_days, and extract are required") unless options[:expected_update_period_in_days].present? && options[:url].present? && options[:extract].present? |
|
64 |
+ errors.add(:base, "url and expected_update_period_in_days are required") unless options[:expected_update_period_in_days].present? && options[:url].present? |
|
65 |
+ if !options[:extract].present? && options[:type] != "json" |
|
66 |
+ errors.add(:base, "extract is required for all types except json") |
|
67 |
+ end |
|
65 | 68 |
end |
66 | 69 |
|
67 | 70 |
def check |
@@ -74,45 +77,54 @@ module Agents |
||
74 | 77 |
request.on_success do |response| |
75 | 78 |
doc = parse(response.body) |
76 | 79 |
output = {} |
77 |
- options[:extract].each do |name, extraction_details| |
|
78 |
- result = if extraction_type == "json" |
|
79 |
- output[name] = Utils.values_at(doc, extraction_details[:path]) |
|
80 |
- else |
|
81 |
- output[name] = doc.css(extraction_details[:css]).map { |node| |
|
82 |
- if extraction_details[:attr] |
|
83 |
- node.attr(extraction_details[:attr]) |
|
84 |
- elsif extraction_details[:text] |
|
85 |
- node.text() |
|
86 |
- else |
|
87 |
- error ":attr or :text is required on HTML or XML extraction patterns" |
|
88 |
- return |
|
89 |
- end |
|
90 |
- } |
|
91 |
- end |
|
92 |
- log "Extracting #{extraction_type} at #{extraction_details[:path] || extraction_details[:css]}: #{result}" |
|
93 |
- end |
|
94 |
- |
|
95 |
- num_unique_lengths = options[:extract].keys.map { |name| output[name].length }.uniq |
|
96 |
- |
|
97 |
- if num_unique_lengths.length != 1 |
|
98 |
- error "Got an uneven number of matches for #{options[:name]}: #{options[:extract].inspect}" |
|
99 |
- return |
|
100 |
- end |
|
101 |
- |
|
102 | 80 |
previous_payloads = events.order("id desc").limit(UNIQUENESS_LOOK_BACK).pluck(:payload).map(&:to_json) if options[:mode].to_s == "on_change" |
103 |
- num_unique_lengths.first.times do |index| |
|
104 |
- result = {} |
|
105 |
- options[:extract].keys.each do |name| |
|
106 |
- result[name] = output[name][index] |
|
107 |
- if name.to_s == 'url' |
|
108 |
- result[name] = URI.join(options[:url], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil? |
|
109 |
- end |
|
110 |
- end |
|
111 | 81 |
|
82 |
+ if extraction_type == "json" && !options[:extract].present? |
|
83 |
+ result = doc |
|
112 | 84 |
if !options[:mode] || options[:mode].to_s == "all" || (options[:mode].to_s == "on_change" && !previous_payloads.include?(result.to_json)) |
113 | 85 |
log "Storing new result for '#{name}': #{result.inspect}" |
114 | 86 |
create_event :payload => result |
115 | 87 |
end |
88 |
+ else |
|
89 |
+ options[:extract].each do |name, extraction_details| |
|
90 |
+ result = if extraction_type == "json" |
|
91 |
+ output[name] = Utils.values_at(doc, extraction_details[:path]) |
|
92 |
+ else |
|
93 |
+ output[name] = doc.css(extraction_details[:css]).map { |node| |
|
94 |
+ if extraction_details[:attr] |
|
95 |
+ node.attr(extraction_details[:attr]) |
|
96 |
+ elsif extraction_details[:text] |
|
97 |
+ node.text() |
|
98 |
+ else |
|
99 |
+ error ":attr or :text is required on HTML or XML extraction patterns" |
|
100 |
+ return |
|
101 |
+ end |
|
102 |
+ } |
|
103 |
+ end |
|
104 |
+ log "Extracting #{extraction_type} at #{extraction_details[:path] || extraction_details[:css]}: #{result}" |
|
105 |
+ end |
|
106 |
+ |
|
107 |
+ num_unique_lengths = options[:extract].keys.map { |name| output[name].length }.uniq |
|
108 |
+ |
|
109 |
+ if num_unique_lengths.length != 1 |
|
110 |
+ error "Got an uneven number of matches for #{options[:name]}: #{options[:extract].inspect}" |
|
111 |
+ return |
|
112 |
+ end |
|
113 |
+ |
|
114 |
+ num_unique_lengths.first.times do |index| |
|
115 |
+ result = {} |
|
116 |
+ options[:extract].keys.each do |name| |
|
117 |
+ result[name] = output[name][index] |
|
118 |
+ if name.to_s == 'url' |
|
119 |
+ result[name] = URI.join(options[:url], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil? |
|
120 |
+ end |
|
121 |
+ end |
|
122 |
+ |
|
123 |
+ if !options[:mode] || options[:mode].to_s == "all" || (options[:mode].to_s == "on_change" && !previous_payloads.include?(result.to_json)) |
|
124 |
+ log "Storing new result for '#{name}': #{result.inspect}" |
|
125 |
+ create_event :payload => result |
|
126 |
+ end |
|
127 |
+ end |
|
116 | 128 |
end |
117 | 129 |
end |
118 | 130 |
hydra.queue request |
@@ -155,6 +155,31 @@ describe Agents::WebsiteAgent do |
||
155 | 155 |
event.payload[:version].should == 2 |
156 | 156 |
event.payload[:title].should == "first" |
157 | 157 |
end |
158 |
+ |
|
159 |
+ it "stores the whole object if :extract is not specified" do |
|
160 |
+ json = { |
|
161 |
+ :response => { |
|
162 |
+ :version => 2, |
|
163 |
+ :title => "hello!" |
|
164 |
+ } |
|
165 |
+ } |
|
166 |
+ stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200) |
|
167 |
+ site = { |
|
168 |
+ :name => "Some JSON Response", |
|
169 |
+ :expected_update_period_in_days => 2, |
|
170 |
+ :type => "json", |
|
171 |
+ :url => "http://json-site.com", |
|
172 |
+ :mode => :on_change |
|
173 |
+ } |
|
174 |
+ checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site) |
|
175 |
+ checker.user = users(:bob) |
|
176 |
+ checker.save! |
|
177 |
+ |
|
178 |
+ checker.check |
|
179 |
+ event = Event.last |
|
180 |
+ event.payload[:response][:version].should == 2 |
|
181 |
+ event.payload[:response][:title].should == "hello!" |
|
182 |
+ end |
|
158 | 183 |
end |
159 | 184 |
end |
160 | 185 |
end |